# Basic Setup and Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import root_mean_squared_error, r2_score
from IPython.display import display, HTML # Display options

# Load the dataset
beauty = pd.read_csv('BEAUTY.csv')
beauty.describe()

# Distribution Plot
plt.figure(figsize=(7, 3))

# 1. Distribution of 'wage'
plt.subplot(1, 2, 1) # 1 row, 2 columns, first plot
sns.histplot(beauty['wage'], kde=True, bins=30, color='skyblue') # Histogram with a density curve
plt.title('Distribution of Hourly Wage (wage)',size =10)
plt.xlabel('Hourly Wage',size =8)
plt.ylabel('Frequency',size =8)

# 2. Distribution of 'lwage'
plt.subplot(1, 2, 2) # 1 row, 2 columns, second plot
sns.histplot(beauty['lwage'], kde=True, bins=30, color='skyblue')
plt.title('Distribution of Log Hourly Wage (lwage)',size =10)
plt.xlabel('Log Hourly Wage',size =8)
plt.ylabel('Frequency',size =8)

plt.tight_layout() # Adjust layout to prevent overlap
plt.show()

# Split data into 70% training and 30% test set
train_set, test_set = train_test_split(beauty, test_size=0.3, random_state=42)

# Define Features (Predictors) and Target
X_cols = ['belavg', 'abvavg', 'exper', 'union',
       'goodhlth', 'black', 'female', 'married', 'south', 
       'bigcity', 'smllcity', 'service', 'expersq', 'educ'] # 14 predictors, excluding `looks`
y_col = ['lwage']

X_train_orig = train_set[X_cols]
y_train_orig  = train_set[y_col].values.ravel() # lwage
X_test_orig  = test_set[X_cols]
y_test_orig = test_set[y_col].values.ravel() # lwage
y_test_wage = test_set['wage'].values.ravel() # Actual wage

# Standardising the data using StandardScaler
scaler = StandardScaler()

# Fit on training data and transform both sets
X_train_scaled  = scaler.fit_transform(X_train_orig)
X_test_scaled = scaler.transform(X_test_orig)

# Cross Validated Ridge Regression
ridge_cv = RidgeCV(cv=10, scoring='neg_root_mean_squared_error')
ridge_cv.fit(X_train_scaled, y_train_orig)

# Predict and evaluate on lwage scale
y_test_pred = ridge_cv.predict(X_test_scaled)
RMSE_lwage_ridge = root_mean_squared_error(y_test_orig, y_test_pred)
r2_lwage_ridge = ridge_cv.score(X_test_scaled, y_test_orig) # R-squared on lwage

# Evaluate on wage scale
y_test_wage_pred = np.exp(y_test_pred) # Transform lwage to wage predictions
RMSE_wage_ridge = root_mean_squared_error(y_test_wage, y_test_wage_pred)

print(f"\n--- Ridge Results ---")
print(f"Ridge Best Alpha: {ridge_cv.alpha_}")
print(f"Ridge RMSE (lwage scale): {RMSE_lwage_ridge}")
print(f"Ridge R-squared (lwage scale): {r2_lwage_ridge}")
print(f"Ridge RMSE (wage scale): {RMSE_wage_ridge}")

# Extract Ridge coefficients
ridge_coeffs = pd.Series(ridge_cv.coef_, index=X_cols)

--- Ridge Results ---
Ridge Best Alpha: 1.0
Ridge RMSE (lwage scale): 0.4409644837921741
Ridge R-squared (lwage scale): 0.43367922539448
Ridge RMSE (wage scale): 3.5740573219647516

# Cross Validated Lasso Regression
lasso_cv = LassoCV(cv=10, random_state=42) # Added random_state for reproducible cross-validation fold generation
lasso_cv.fit(X_train_scaled, y_train_orig)

# Predict and evaluate on lwage scale
y_test_pred = lasso_cv.predict(X_test_scaled)
RMSE_lwage_lasso = root_mean_squared_error(y_test_orig, y_test_pred)
r2_lwage_lasso = lasso_cv.score(X_test_scaled, y_test_orig) # R-squared on lwage

# Evaluate on wage scale
y_test_wage_pred = np.exp(y_test_pred) # Transform lwage to wage predictions
RMSE_wage_lasso = root_mean_squared_error(y_test_wage, y_test_wage_pred)

print(f"\n--- Lasso Results ---")
print(f"Lasso Best Alpha: {lasso_cv.alpha_}")
print(f"Lasso RMSE (lwage scale): {RMSE_lwage_lasso}")
print(f"Lasso R-squared (lwage scale): {r2_lwage_lasso}")
print(f"Lasso RMSE (wage scale): {RMSE_wage_lasso}")

# Extract Lasso Feature Selection
lasso_coeffs = pd.Series(lasso_cv.coef_, index=X_cols)
non_zero_coeffs = np.sum(lasso_coeffs != 0)
print(f"\nNumber of features used by Lasso: {non_zero_coeffs} out of {len(X_cols)}")

--- Lasso Results ---
Lasso Best Alpha: 0.0005055586885587609
Lasso RMSE (lwage scale): 0.44101492477569104
Lasso R-squared (lwage scale): 0.4335496575417561
Lasso RMSE (wage scale): 3.5744144564418434

Number of features used by Lasso: 14 out of 14

# Compare Ridge and Lasso performance
results_data = {
    'RMSE (lwage scale)': [RMSE_lwage_ridge, RMSE_lwage_lasso],
    'RMSE (wage scale)': [RMSE_wage_ridge, RMSE_wage_lasso],
    'R-squared (lwage scale)': [r2_lwage_ridge, r2_lwage_lasso],
    'Best Alpha': [ridge_cv.alpha_, lasso_cv.alpha_],
    'Features Used': [len(X_cols), non_zero_coeffs] # Show total for Ridge, selected for Lasso
}
results_df = pd.DataFrame(results_data, index=['Ridge', 'Lasso'])
print ("The performance metrics and coefficients for Ridge and Lasso are compared below:\n")
display(results_df.style.format())

# Compare coefficients
print ("\n--- Coefficients Comparison (on lwage scale) ---")
coefficients_df = pd.DataFrame({
    'Ridge Coefficient': ridge_coeffs,
    'Lasso Coefficient': lasso_coeffs
})
# Format for readability
coefficients_df_formatted = coefficients_df.style.format('{:.4f}')
display(coefficients_df_formatted)

The performance metrics and coefficients for Ridge and Lasso are compared below:

--- Coefficients Comparison (on lwage scale) ---

# Import necessary packages
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

# Use the original train_set, test_set in part 1
# X_train_orig, y_train_orig
# X_test_orig, y_test_orig
# y_test_wage

# Create the Pipeline
rfr_pipeline = Pipeline([
    ('scaler', StandardScaler()), # Step 1: Scale the data
    ('rfr', RandomForestRegressor(random_state=42)) # Step 2: Random Forest 
])

# Define Hyperparameter Grid
param_grid = { 
    'rfr__n_estimators': [100, 200, 300],
    'rfr__max_features': ['log2', 'sqrt'],
    'rfr__max_depth': [15, 25, None],
    'rfr__criterion' : ['squared_error'], 
    'rfr__min_samples_leaf' : [1,3,5],
    'rfr__ccp_alpha' : [0, 0.01]
}

# Run Grid Search: 5-fold cross-validation and minimize RMSE
grid_search_rf = GridSearchCV(estimator=rfr_pipeline, param_grid=param_grid, cv=5,
                        scoring='neg_root_mean_squared_error', return_train_score=True, verbose=1)

# Fit model
grid_search_rf.fit(X_train_orig, y_train_orig)
best_rf_pipe = grid_search_rf.best_estimator_

# Optiomal parameters
print("\nBest RF Pipeline Parameters found:")
print(grid_search_rf.best_params_)

Fitting 5 folds for each of 108 candidates, totalling 540 fits

Best RF Pipeline Parameters found:
{'rfr__ccp_alpha': 0, 'rfr__criterion': 'squared_error', 'rfr__max_depth': 25, 'rfr__max_features': 'log2', 'rfr__min_samples_leaf': 3, 'rfr__n_estimators': 300}

# Analyze Feature Importance
best_rfr_model = best_rf_pipe.named_steps['rfr']
importances_rf = best_rfr_model.feature_importances_

# Create a DataFrame for visualization
feature_importance_rf_df = pd.DataFrame({'Feature': X_cols, 'Importance': importances_rf
                                        }).sort_values(by='Importance', ascending=False).reset_index(drop=True)

# Plot feature importances
plt.figure(figsize=(6, 4))
ax = sns.barplot(x='Importance', y='Feature', data=feature_importance_rf_df, color='skyblue')
for i, (importance, feature) in enumerate(zip(feature_importance_rf_df['Importance'], feature_importance_rf_df['Feature'])):
    ax.text(importance + 0.005, i, f"{importance:.3f}", va='center', fontsize=9)
plt.title('Feature Importances from Random Forest',size =12)
plt.xlabel('Importance Score', size=10)
plt.ylabel('Feature', size=10)
plt.tight_layout()
plt.show()

# Predict and evaluate on lwage scale
y_test_pred = best_rf_pipe.predict(X_test_orig)
RMSE_lwage_rf = root_mean_squared_error(y_test_orig, y_test_pred)
r2_lwage_rf = best_rf_pipe.score(X_test_orig, y_test_orig) # R-squared on lwage

# Evaluate on the original wage scale
y_test_wage_pred = np.exp(y_test_pred) # Transform lwage to wage predictions
RMSE_wage_rf = root_mean_squared_error(y_test_wage, y_test_wage_pred)

print(f"\n--- Random Forests Results ---")
print(f"Random Forest RMSE (log wage scale): {RMSE_lwage_rf}")
print(f"Random Forest  R-squared (lwage scale): {r2_lwage_rf}")
print(f"Random Forest RMSE (wage scale): {RMSE_wage_rf}")

--- Random Forests Results ---
Random Forest RMSE (log wage scale): 0.4496394322341057
Random Forest  R-squared (lwage scale): 0.41117796879429824
Random Forest RMSE (wage scale): 3.592793131443144

import tensorflow as tf
from tensorflow import keras

# Use the original train_set, test_set in part 1
# X_train_orig, y_train_orig
# X_test_orig, y_test_orig
# y_test_wage

# Using 25% of the X_train_orig for validation
X_train, X_valid, y_train, y_valid = train_test_split(X_train_orig, y_train_orig, test_size=0.25, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test_orig)

# Build the Neural Network Model
model_nn = keras.models.Sequential([
    keras.layers.InputLayer(shape=[X_train.shape[1]]),
    keras.layers.Dense(36, activation="relu"),   # first hidden layer
    keras.layers.Dense(1)                        # output layer
 ])
model_nn.summary() # Print model architecture

Model: "sequential_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense_2 (Dense)                 │ (None, 36)             │           540 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense)                 │ (None, 1)              │            37 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 577 (2.25 KB)

 Trainable params: 577 (2.25 KB)

 Non-trainable params: 0 (0.00 B)

# Compile the model
model_nn.compile(loss="mean_squared_error",
                 optimizer=keras.optimizers.Adam(learning_rate=0.001), # Adam optimizer
                 metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Train the model with early stopping
early_stopping_cb = keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True, monitor='val_loss', verbose=1) 
history = model_nn.fit(X_train_scaled , y_train , epochs=100
                       , validation_data=(X_valid_scaled , y_valid)
                       , callbacks=[early_stopping_cb]
                      )

Epoch 1/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 2s 14ms/step - loss: 2.5352 - root_mean_squared_error: 1.5915 - val_loss: 1.5780 - val_root_mean_squared_error: 1.2562
Epoch 2/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 1.4161 - root_mean_squared_error: 1.1888 - val_loss: 0.9421 - val_root_mean_squared_error: 0.9706
Epoch 3/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.7811 - root_mean_squared_error: 0.8832 - val_loss: 0.6653 - val_root_mean_squared_error: 0.8157
Epoch 4/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.4774 - root_mean_squared_error: 0.6907 - val_loss: 0.5587 - val_root_mean_squared_error: 0.7474
Epoch 5/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.3808 - root_mean_squared_error: 0.6166 - val_loss: 0.5027 - val_root_mean_squared_error: 0.7090
Epoch 6/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.3403 - root_mean_squared_error: 0.5829 - val_loss: 0.4662 - val_root_mean_squared_error: 0.6828
Epoch 7/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2906 - root_mean_squared_error: 0.5388 - val_loss: 0.4421 - val_root_mean_squared_error: 0.6649
Epoch 8/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2902 - root_mean_squared_error: 0.5382 - val_loss: 0.4242 - val_root_mean_squared_error: 0.6513
Epoch 9/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.2740 - root_mean_squared_error: 0.5230 - val_loss: 0.4097 - val_root_mean_squared_error: 0.6401
Epoch 10/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.2426 - root_mean_squared_error: 0.4923 - val_loss: 0.3979 - val_root_mean_squared_error: 0.6308
Epoch 11/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2267 - root_mean_squared_error: 0.4759 - val_loss: 0.3912 - val_root_mean_squared_error: 0.6254
Epoch 12/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2163 - root_mean_squared_error: 0.4648 - val_loss: 0.3847 - val_root_mean_squared_error: 0.6203
Epoch 13/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2127 - root_mean_squared_error: 0.4611 - val_loss: 0.3767 - val_root_mean_squared_error: 0.6138
Epoch 14/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2161 - root_mean_squared_error: 0.4647 - val_loss: 0.3723 - val_root_mean_squared_error: 0.6101
Epoch 15/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2021 - root_mean_squared_error: 0.4492 - val_loss: 0.3658 - val_root_mean_squared_error: 0.6049
Epoch 16/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.2106 - root_mean_squared_error: 0.4584 - val_loss: 0.3636 - val_root_mean_squared_error: 0.6030
Epoch 17/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1953 - root_mean_squared_error: 0.4419 - val_loss: 0.3589 - val_root_mean_squared_error: 0.5991
Epoch 18/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.2030 - root_mean_squared_error: 0.4502 - val_loss: 0.3545 - val_root_mean_squared_error: 0.5954
Epoch 19/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.2015 - root_mean_squared_error: 0.4486 - val_loss: 0.3532 - val_root_mean_squared_error: 0.5943
Epoch 20/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.2061 - root_mean_squared_error: 0.4530 - val_loss: 0.3500 - val_root_mean_squared_error: 0.5916
Epoch 21/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1862 - root_mean_squared_error: 0.4313 - val_loss: 0.3488 - val_root_mean_squared_error: 0.5906
Epoch 22/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1570 - root_mean_squared_error: 0.3952 - val_loss: 0.3457 - val_root_mean_squared_error: 0.5879
Epoch 23/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1871 - root_mean_squared_error: 0.4320 - val_loss: 0.3428 - val_root_mean_squared_error: 0.5855
Epoch 24/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1770 - root_mean_squared_error: 0.4203 - val_loss: 0.3439 - val_root_mean_squared_error: 0.5864
Epoch 25/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1822 - root_mean_squared_error: 0.4264 - val_loss: 0.3415 - val_root_mean_squared_error: 0.5844
Epoch 26/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1823 - root_mean_squared_error: 0.4269 - val_loss: 0.3393 - val_root_mean_squared_error: 0.5825
Epoch 27/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1716 - root_mean_squared_error: 0.4141 - val_loss: 0.3363 - val_root_mean_squared_error: 0.5799
Epoch 28/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1670 - root_mean_squared_error: 0.4085 - val_loss: 0.3376 - val_root_mean_squared_error: 0.5810
Epoch 29/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1730 - root_mean_squared_error: 0.4157 - val_loss: 0.3334 - val_root_mean_squared_error: 0.5774
Epoch 30/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1707 - root_mean_squared_error: 0.4131 - val_loss: 0.3358 - val_root_mean_squared_error: 0.5795
Epoch 31/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1685 - root_mean_squared_error: 0.4102 - val_loss: 0.3336 - val_root_mean_squared_error: 0.5776
Epoch 32/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1522 - root_mean_squared_error: 0.3899 - val_loss: 0.3318 - val_root_mean_squared_error: 0.5760
Epoch 33/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1641 - root_mean_squared_error: 0.4050 - val_loss: 0.3302 - val_root_mean_squared_error: 0.5746
Epoch 34/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1630 - root_mean_squared_error: 0.4033 - val_loss: 0.3299 - val_root_mean_squared_error: 0.5743
Epoch 35/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1647 - root_mean_squared_error: 0.4055 - val_loss: 0.3292 - val_root_mean_squared_error: 0.5737
Epoch 36/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1540 - root_mean_squared_error: 0.3923 - val_loss: 0.3278 - val_root_mean_squared_error: 0.5726
Epoch 37/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1548 - root_mean_squared_error: 0.3933 - val_loss: 0.3293 - val_root_mean_squared_error: 0.5738
Epoch 38/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1552 - root_mean_squared_error: 0.3939 - val_loss: 0.3280 - val_root_mean_squared_error: 0.5728
Epoch 39/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1492 - root_mean_squared_error: 0.3856 - val_loss: 0.3268 - val_root_mean_squared_error: 0.5717
Epoch 40/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1563 - root_mean_squared_error: 0.3953 - val_loss: 0.3278 - val_root_mean_squared_error: 0.5726
Epoch 41/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1420 - root_mean_squared_error: 0.3763 - val_loss: 0.3265 - val_root_mean_squared_error: 0.5714
Epoch 42/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1578 - root_mean_squared_error: 0.3971 - val_loss: 0.3255 - val_root_mean_squared_error: 0.5705
Epoch 43/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.1568 - root_mean_squared_error: 0.3958 - val_loss: 0.3240 - val_root_mean_squared_error: 0.5692
Epoch 44/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1513 - root_mean_squared_error: 0.3889 - val_loss: 0.3259 - val_root_mean_squared_error: 0.5709
Epoch 45/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1511 - root_mean_squared_error: 0.3887 - val_loss: 0.3236 - val_root_mean_squared_error: 0.5688
Epoch 46/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1596 - root_mean_squared_error: 0.3993 - val_loss: 0.3234 - val_root_mean_squared_error: 0.5687
Epoch 47/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1564 - root_mean_squared_error: 0.3954 - val_loss: 0.3243 - val_root_mean_squared_error: 0.5694
Epoch 48/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1486 - root_mean_squared_error: 0.3853 - val_loss: 0.3236 - val_root_mean_squared_error: 0.5689
Epoch 49/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1370 - root_mean_squared_error: 0.3698 - val_loss: 0.3228 - val_root_mean_squared_error: 0.5681
Epoch 50/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1463 - root_mean_squared_error: 0.3822 - val_loss: 0.3228 - val_root_mean_squared_error: 0.5682
Epoch 51/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1381 - root_mean_squared_error: 0.3715 - val_loss: 0.3235 - val_root_mean_squared_error: 0.5687
Epoch 52/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1473 - root_mean_squared_error: 0.3835 - val_loss: 0.3214 - val_root_mean_squared_error: 0.5669
Epoch 53/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1308 - root_mean_squared_error: 0.3607 - val_loss: 0.3231 - val_root_mean_squared_error: 0.5684
Epoch 54/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1517 - root_mean_squared_error: 0.3894 - val_loss: 0.3241 - val_root_mean_squared_error: 0.5693
Epoch 55/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1634 - root_mean_squared_error: 0.4037 - val_loss: 0.3212 - val_root_mean_squared_error: 0.5668
Epoch 56/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1352 - root_mean_squared_error: 0.3672 - val_loss: 0.3215 - val_root_mean_squared_error: 0.5670
Epoch 57/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1342 - root_mean_squared_error: 0.3661 - val_loss: 0.3213 - val_root_mean_squared_error: 0.5668
Epoch 58/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1541 - root_mean_squared_error: 0.3923 - val_loss: 0.3230 - val_root_mean_squared_error: 0.5683
Epoch 59/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1493 - root_mean_squared_error: 0.3859 - val_loss: 0.3212 - val_root_mean_squared_error: 0.5668
Epoch 60/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1462 - root_mean_squared_error: 0.3821 - val_loss: 0.3206 - val_root_mean_squared_error: 0.5662
Epoch 61/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1333 - root_mean_squared_error: 0.3647 - val_loss: 0.3200 - val_root_mean_squared_error: 0.5656
Epoch 62/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1426 - root_mean_squared_error: 0.3774 - val_loss: 0.3217 - val_root_mean_squared_error: 0.5672
Epoch 63/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1435 - root_mean_squared_error: 0.3787 - val_loss: 0.3212 - val_root_mean_squared_error: 0.5667
Epoch 64/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1389 - root_mean_squared_error: 0.3725 - val_loss: 0.3193 - val_root_mean_squared_error: 0.5651
Epoch 65/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1330 - root_mean_squared_error: 0.3644 - val_loss: 0.3214 - val_root_mean_squared_error: 0.5669
Epoch 66/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.1467 - root_mean_squared_error: 0.3826 - val_loss: 0.3207 - val_root_mean_squared_error: 0.5663
Epoch 67/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1425 - root_mean_squared_error: 0.3773 - val_loss: 0.3207 - val_root_mean_squared_error: 0.5663
Epoch 68/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1247 - root_mean_squared_error: 0.3528 - val_loss: 0.3193 - val_root_mean_squared_error: 0.5650
Epoch 69/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1335 - root_mean_squared_error: 0.3652 - val_loss: 0.3197 - val_root_mean_squared_error: 0.5654
Epoch 70/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1323 - root_mean_squared_error: 0.3633 - val_loss: 0.3216 - val_root_mean_squared_error: 0.5671
Epoch 71/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - loss: 0.1324 - root_mean_squared_error: 0.3637 - val_loss: 0.3202 - val_root_mean_squared_error: 0.5659
Epoch 72/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1394 - root_mean_squared_error: 0.3731 - val_loss: 0.3218 - val_root_mean_squared_error: 0.5673
Epoch 73/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1347 - root_mean_squared_error: 0.3662 - val_loss: 0.3223 - val_root_mean_squared_error: 0.5677
Epoch 74/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1394 - root_mean_squared_error: 0.3732 - val_loss: 0.3212 - val_root_mean_squared_error: 0.5668
Epoch 75/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1316 - root_mean_squared_error: 0.3625 - val_loss: 0.3213 - val_root_mean_squared_error: 0.5669
Epoch 76/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1505 - root_mean_squared_error: 0.3877 - val_loss: 0.3219 - val_root_mean_squared_error: 0.5674
Epoch 77/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1455 - root_mean_squared_error: 0.3813 - val_loss: 0.3221 - val_root_mean_squared_error: 0.5675
Epoch 78/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1421 - root_mean_squared_error: 0.3766 - val_loss: 0.3223 - val_root_mean_squared_error: 0.5677
Epoch 79/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1217 - root_mean_squared_error: 0.3485 - val_loss: 0.3223 - val_root_mean_squared_error: 0.5677
Epoch 80/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1237 - root_mean_squared_error: 0.3512 - val_loss: 0.3237 - val_root_mean_squared_error: 0.5689
Epoch 81/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1305 - root_mean_squared_error: 0.3609 - val_loss: 0.3207 - val_root_mean_squared_error: 0.5663
Epoch 82/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1251 - root_mean_squared_error: 0.3533 - val_loss: 0.3220 - val_root_mean_squared_error: 0.5674
Epoch 83/100
21/21 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - loss: 0.1416 - root_mean_squared_error: 0.3762 - val_loss: 0.3213 - val_root_mean_squared_error: 0.5669
Epoch 83: early stopping
Restoring model weights from the end of the best epoch: 68.

import pandas as pd
import matplotlib.pyplot as plt

pd.DataFrame(history.history).plot(figsize=(6,4))
plt.grid(True)
plt.title("NN Training History (MSE & RMSE on lwage scale)")
plt.xlabel("Epoch")
plt.ylabel("Metric Value")
plt.show()

# Evaluate on lwage scale
results_nn = model_nn.evaluate(X_test_scaled, y_test_orig)
y_test_pred = model_nn.predict(X_test_scaled).flatten() # Flatten output
RMSE_lwage_nn = results_nn[1]
r2_lwage_nn = r2_score(y_test_orig, y_test_pred)

# Make predictions and Evaluate on the wage scale
y_test_wage_pred = np.exp(y_test_pred)
RMSE_wage_nn = root_mean_squared_error(y_test_wage, y_test_wage_pred)

print(f"\n--- Neural Networks Results ---")
print(f"Neural Networks RMSE (log wage scale): {results_nn[1]}") # Index 1 corresponds to RootMeanSquaredError metric
print(f"Neural Networks  R-squared (lwage scale): {r2_lwage_nn}")
print(f"Neural Networks RMSE (wage scale): {RMSE_wage_nn}")

12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 0.2210 - root_mean_squared_error: 0.4696 
12/12 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 

--- Neural Networks Results ---
Neural Networks RMSE (log wage scale): 0.4596092700958252
Neural Networks  R-squared (lwage scale): 0.3847765523523943
Neural Networks RMSE (wage scale): 3.6098844297052466

# Consolidate results into a dictionary
results_data = {
    'RMSE (lwage scale)': [RMSE_lwage_ridge, RMSE_lwage_lasso, RMSE_lwage_rf, RMSE_lwage_nn],
    'RMSE (wage scale)': [RMSE_wage_ridge, RMSE_wage_lasso, RMSE_wage_rf, RMSE_wage_nn],
    'R-squared (lwage scale)': [r2_lwage_ridge, r2_lwage_lasso,r2_lwage_rf, r2_lwage_nn],
}

# Create DataFrame
results_df = pd.DataFrame(results_data, index=['Ridge', 'Lasso', 'Random Forests', 'Neural Networks'])
print ("The performance of four models is summarized below:\n")
display(results_df.style.format())

The performance of four models is summarized below:

fig, axes = plt.subplots(1, 2, figsize=(18, 8)) # 1 row, 2 columns

# --- Plot 1: Ridge Coefficients ---
ridge_coeffs_sorted = ridge_coeffs.sort_values()
sns.barplot(x=ridge_coeffs_sorted.values, y=ridge_coeffs_sorted.index, 
            ax=axes[0], hue=ridge_coeffs_sorted.index, palette="coolwarm_r")
axes[0].axvline(0, color='black', linewidth=0.8, linestyle='--')
for i, value in enumerate(ridge_coeffs_sorted.values):
    axes[0].text(value + 0.01 * np.sign(value), i, f"{value:.2f}", 
                 va='center', ha='left' if value > 0 else 'right', fontsize=9)
axes[0].set_title('Ridge Regression Coefficients\n(Impact on log-wage)')
axes[0].set_xlabel('Coefficient Value')
axes[0].set_ylabel('Predictors')

# --- Plot 2: Relationship between Experience and Hourly Wage ---
X_train_unscaled_df = pd.DataFrame(X_train_orig, columns=X_cols)
y_train_unscaled = y_train_orig
# 1. Create experience range
exper_min = X_train_unscaled_df['exper'].min()
exper_max = X_train_unscaled_df['exper'].max()
exper_range = np.linspace(exper_min, exper_max, 100)
expersq_range = exper_range**2

# 2. Create data for prediction, holding others constant at mean
means_other_vars = X_train_unscaled_df.drop(columns=['exper', 'expersq']).mean()
X_plot_unscaled = pd.DataFrame([means_other_vars.values] * len(exper_range), columns=means_other_vars.index)
# Add exper/expersq columns
X_plot_unscaled['exper'] = exper_range
X_plot_unscaled['expersq'] = expersq_range
X_plot_unscaled = X_plot_unscaled[X_cols] # Reorder to match original columns

# 3. Scale the plotting data
X_plot_scaled = scaler.transform(X_plot_unscaled)

# 4. Predict lwage using the scaled data
lwage_pred_curve = ridge_cv.predict(X_plot_scaled)

# 5. Create plot 
axes[1].scatter(X_train_unscaled_df['exper'], y_train_unscaled, alpha=0.2, label='Training Data Points', color='skyblue')
axes[1].plot(exper_range, lwage_pred_curve, label='Predicted log(wage) (Ridge Fit)', color='red', linewidth=2.5)
axes[1].axhline(y=np.mean(y_train_unscaled), color='gray', linestyle='--', label='Mean log(wage) of Training Data')
axes[1].set_title('Partial Effect of Experience on Predicted log(Wage)')
axes[1].set_xlabel('Years of Experience (exper)')
axes[1].set_ylabel('Predicted Log Hourly Wage (lwage)')
axes[1].grid(True)
axes[1].legend()

# --- Final Figure Adjustments ---
plt.tight_layout()
plt.show()

	wage	lwage	belavg	abvavg	exper	looks	union	goodhlth	black	female	married	south	bigcity	smllcity	service	expersq	educ
count	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000	1260.000000
mean	6.306690	1.658800	0.123016	0.303968	18.206349	3.185714	0.272222	0.933333	0.073810	0.346032	0.691270	0.174603	0.219048	0.466667	0.273810	474.482540	12.563492
std	4.660639	0.594508	0.328586	0.460152	11.963485	0.684877	0.445280	0.249543	0.261564	0.475892	0.462153	0.379778	0.413765	0.499086	0.446089	534.645425	2.624489
min	1.020000	0.019803	0.000000	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	5.000000
25%	3.707500	1.310357	0.000000	0.000000	8.000000	3.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	64.000000	12.000000
50%	5.300000	1.667705	0.000000	0.000000	15.000000	3.000000	0.000000	1.000000	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	225.000000	12.000000
75%	7.695000	2.040570	0.000000	1.000000	27.000000	4.000000	1.000000	1.000000	0.000000	1.000000	1.000000	0.000000	0.000000	1.000000	1.000000	729.000000	13.000000
max	77.720000	4.353113	1.000000	1.000000	48.000000	5.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	2304.000000	17.000000

	Ridge Coefficient	Lasso Coefficient
belavg	-0.0386	-0.0383
abvavg	0.0110	0.0105
exper	0.4722	0.4708
union	0.0737	0.0733
goodhlth	0.0199	0.0195
black	-0.0159	-0.0154
female	-0.1732	-0.1733
married	0.0189	0.0184
south	0.0316	0.0312
bigcity	0.0998	0.0990
smllcity	0.0399	0.0391
service	-0.0537	-0.0532
expersq	-0.3230	-0.3215
educ	0.1823	0.1823

Personal Project: Predicting Hourly Wage

Introduction¶

1. Ridge and Lasso Regression for Hourly Wage Prediction¶

1.1. Data Preparation¶

Data Loading¶

Data Splitting and Scaling¶

1.2. Model Implementation and Evaluation¶

Cross Validated Ridge Regression¶

Cross Validated LASSO¶

1.3. Comparison and Summary¶

2. Random Forest and Neural Network for Hourly Wage Prediction¶

2.1. Random Forest Regression¶

Implement Random Forest¶

Evaluate Random Forest¶

2.2. Neural Network (NN) Regression¶

Build the Neural Network Model¶

Compile and Train Model¶

Monitor Training Process¶

Evaluate Neural Network¶

2.3. Model Comparison and Final Selection¶

Reference¶

	RMSE (lwage scale)	RMSE (wage scale)	R-squared (lwage scale)	Best Alpha	Features Used
Ridge	0.440964	3.574057	0.433679	1.000000	14
Lasso	0.441015	3.574414	0.433550	0.000506	14